This file loads data objects stored from simulations and creates Figures 2,3,4, and 5 from the manuscript. Libraries and code are sourced below.
library(tidyverse)
library(stringr)
library(viridis)
library(splines)
library(refund)
library(Matrix)
library(mvtnorm)
library(patchwork)
library(gridExtra)
library(pracma)
#library(ggthemes)
knitr::opts_chunk$set(echo = TRUE,
warning = FALSE,
message = FALSE,
fig.width = 9,
fig.height = 4,
fig.path = '../output/'
)
theme_set(theme_bw() + theme(legend.position = "bottom"))
source(here::here("preprocessing", "simulated_paw.R"))
source(here::here("preprocessing", "make_pffr.R"))
source(here::here("preprocessing", "estimate_flode.R"))
source(here::here("preprocessing", "utils.R"))
source(here::here("preprocessing", "search_alpha.R"))
Here we load and process simulation results.
Results generated by the file 20211227_simulations_flodeVSfhist.R. Compares flode model to historical functional regression across 9 different \(\alpha\) values.
files = list.files(here::here("output", "simulation_results"),
pattern = "fhist",
full.names = TRUE)
# load all results
results_fhist = c()
for(f in 1:length(files)){
load(files[f])
results_fhist = rbind(results_fhist, as_tibble(results))
}
results_fhist = results_fhist %>%
mutate(alpha = factor(alpha_true, levels = c(0.1,0.5,1,2,4,6,8,10,12),
labels = paste0("alpha = ", c(0.1,0.5,1,2,4,6,8,10,12)))) %>%
select(iter, scenario, alpha_true, alpha, contains("flode"), contains("fhist"), N, D, alpha_est, lambda_d_est,
lambda_b_est, maxiter)
files = list.files(here::here("output", "simulation_results"),
pattern = "flode_",
full.names = TRUE)
# load all results
results_flode = c()
for(f in 1:length(files)){
load(files[f])
results_flode = rbind(results_flode, as_tibble(results))
}
# combine with fhist results so you have N = 100 D = 50 scenario covered
results_flode = results_flode %>%
mutate(alpha = factor(alpha_true, levels = c(0.1,0.5,1,2,4,6,8,10,12),
labels = paste0("alpha = ", c(0.1,0.5,1,2,4,6,8,10,12)))) %>%
select(iter, scenario, alpha_true, alpha, alpha_est, N, D, time_flode, maxiter) %>%
bind_rows(., select(results_fhist, iter, scenario, alpha_true, alpha, alpha_est, N, D, time_flode, maxiter)) %>%
filter(alpha_true %in% c(0.5, 4, 12))
fig_simdata_RE in the manuscriptFirst simulate data under three different alpha values, \(\alpha \in (0.5, 4, 12)\). Other parameter settings are below.
simulated_data0.5 = simulate_flode(I = 100,
D = 50,
sigma_y0 = 5,
sigma = 0.1,
alpha = 0.5,
rand_int = TRUE,
lambda0 = 50,
seed = 8987)
simulated_data4 = simulate_flode(I = 100,
D = 50,
sigma_y0 = 5,
sigma = 0.1,
alpha = 4,
rand_int = TRUE,
lambda0 = 50,
seed = 8987)
simulated_data12 = simulate_flode(I = 100,
D = 50,
sigma_y0 = 5,
sigma = 0.1,
alpha = 12,
rand_int = TRUE,
lambda0 = 50,
seed = 8987)
Collect values and surfaces into one dataset.
dat = simulated_data0.5$data %>%
select(trial, time, alpha_0.5 = value) %>%
mutate(alpha_4 = simulated_data4$data$value,
alpha_12 = simulated_data12$data$value)
t = s = seq(0, 1, length.out = 50)
surface0.5 = simulated_data0.5$surface %>% as_tibble() %>%
mutate(t = t) %>% select(t, everything()) %>%
gather(s, value, starts_with("s")) %>%
mutate(s = as.numeric(str_remove(s, "s"))) %>%
mutate(value = ifelse(s <= t, value, NA)) %>%
mutate(alpha = "alpha_0.5")
surface4 = simulated_data4$surface %>% as_tibble() %>%
mutate(t = t) %>% select(t, everything()) %>%
gather(s, value, starts_with("s")) %>%
mutate(s = as.numeric(str_remove(s, "s"))) %>%
mutate(value = ifelse(s <= t, value, NA)) %>%
mutate(alpha = "alpha_4")
surface12 = simulated_data12$surface %>% as_tibble() %>%
mutate(t = t) %>% select(t, everything()) %>%
gather(s, value, starts_with("s")) %>%
mutate(s = as.numeric(str_remove(s, "s"))) %>%
mutate(value = ifelse(s <= t, value, NA)) %>%
mutate(alpha = "alpha_12")
surface_df = bind_rows(surface0.5, surface4, surface12)
values = dat %>%
gather(alpha, value, starts_with("alpha")) %>%
mutate(alpha = str_replace(alpha, "alpha_", "alpha = "),
alpha = factor(alpha),
alpha = fct_relevel(alpha, "alpha = 0.5", "alpha = 4", "alpha = 12")) %>%
ggplot(aes(time, value, group = trial)) +
geom_line(alpha = 0.25) +
labs(y = "Y(t)") +
facet_wrap(~ alpha)
surfaces = surface_df %>%
mutate(alpha = str_replace(alpha, "alpha_", "alpha = "),
alpha = factor(alpha),
alpha = fct_relevel(alpha, "alpha = 0.5", "alpha = 4", "alpha = 12")) %>%
ggplot(aes(s, t)) +
geom_tile(aes(fill = value, col = value)) +
scale_fill_viridis_c() +
scale_colour_viridis_c() +
facet_wrap(~ alpha) +
theme(legend.position = "bottom")
other = simulated_data0.5$data %>%
select(trial, time, "forcing functions" = x, "random effects" = re) %>%
gather(param, value, "forcing functions":"random effects") %>%
mutate(param = factor(param),
param = fct_relevel(param, "forcing functions", "random effects")) %>%
ggplot(aes(time, value, group = trial)) +
geom_line(alpha = 0.25) +
facet_wrap(~ param, scales = "free")
other + values + surfaces + plot_layout(ncol = 1)
flode and fhist comparison on one simulated dataset with \(\alpha\) = 4.
fig_sim_results in the manuscriptdat = simulated_data4$data %>%
mutate(int = 1)
alpha_start = search_alpha(data = dat, Kt = 20)$best_alpha
## run flode
flode_results = estimate_flode(dat,
Kt = 20,
alpha0 = 4.1,
max_iter = 200,
forcing_functions = c("int", "x"),
tol = 0.00001,
lambda_b = 100,
lambda_d = 100,
random_effects = TRUE,
initial_position = TRUE,
y0_vec = simulated_data4$y0)
## fhist
fhist_results_re = make_pffr(dat, random_int = TRUE)
Organize the results
delta_fhist = coef(fhist_results_re, n1 = 50, n2 = 50)$smterms$"s(trial)"$coef %>%
as_tibble() %>%
select(trial = trial, time = t.vec, lambda = value) %>%
mutate(trial = as.numeric(trial)) %>%
arrange(trial) %>%
pull(lambda)
## using seWithMean for s(t.vec) .
dat = simulated_data4$data %>%
mutate(yhat_flode = flode_results$data$yhat,
resid_flode = value - yhat_flode,
yhat_fhist = fhist_results_re$fitted.values,
resid_fhist = value - yhat_fhist,
delta_flode = flode_results$data$deltaStar,
delta_fhist = delta_fhist)
surface_flode = flode_results$surface[[2]] %>% as_tibble() %>%
mutate(t = t) %>% select(t, everything()) %>%
gather(s, value, starts_with("s")) %>%
mutate(s = surface4$s) %>%
mutate(value = ifelse(s <= t, value, NA)) %>%
mutate(type = "flode")
fhist_surface_re = coef(fhist_results_re, n1 = 50, n2 = 50)$smterms$"ff(x,s)"$coef %>%
select(s = x.smat, t = x.tmat, value = value) %>% as_tibble()
## using seWithMean for s(t.vec) .
# interpolate surface so it is on the same grid as flode
interp_obj = list(x = unique(fhist_surface_re$s), y = unique(fhist_surface_re$t),
z = matrix(fhist_surface_re$value, 50, 50, byrow = TRUE))
loc_mat = list(x = s, y = t)
surface_re = fields::interp.surface.grid(interp_obj, loc_mat)$z
surface_re[50,] = surface_re[50-1,] # carry last value forward so not NA
surface_fhist = surface_re * lower.tri(surface_re, diag = TRUE)
colnames(surface_fhist) = colnames(simulated_data4$surface)
surface_fhist = surface_fhist %>%
as_tibble() %>%
mutate(t = t) %>% select(t, everything()) %>%
gather(s, value, starts_with("s")) %>%
mutate(s = as.numeric(str_remove(s, "s"))) %>%
mutate(value = ifelse(s <= t, value, NA)) %>%
mutate(type = "fhist")
surface_df = bind_rows(mutate(surface4, type = "truth"), surface_fhist, surface_flode)
fitted = dat %>%
gather(method, yhat_value, starts_with("yhat")) %>%
mutate(method = ifelse(method == "yhat_fhist", "yhat fhist", "yhat flode")) %>%
ggplot(aes(time, value, group = trial)) +
geom_line(alpha = 0.5, color = "gray") +
geom_line(aes(y = yhat_value), color = "darkred", alpha = 0.15) +
labs(y = "Y(t)") +
facet_wrap(~ method)
resid = dat %>%
gather(method, resid_value, starts_with("resid")) %>%
ggplot(aes(time, resid_value)) + geom_point(alpha = 0.2) +
facet_wrap(~ method)
delta = dat %>%
gather(method, delta_value, starts_with("delta")) %>%
mutate(method = ifelse(method == "delta_fhist", "fhist random effects", "flode random effects")) %>%
ggplot(aes(time, reStar, group = trial)) + geom_line(alpha = 0.5, color = "gray") +
geom_line(aes(y = delta_value), color = "darkred", alpha = 0.15) +
labs(y = "random effects, data scale") +
facet_wrap(~ method)
surfaces = surface_df %>%
#mutate(s = round(s, 2), t = round(t, 2)) %>%
ggplot(aes(s, t)) +
geom_tile(aes(fill = value, col = value)) +
scale_fill_viridis_c() +
scale_colour_viridis_c() +
facet_wrap(~type) +
theme(legend.position = "bottom")
fitted + delta + surfaces + plot_layout(ncol = 1)
fig_sim_surfaceErr in the manuscripta = results_fhist %>%
pivot_longer(c(surfaceErr_flode, surfaceErr_fhist), names_to = "model", values_to = "surface_error") %>%
mutate(model = str_remove(model, "surfaceErr_")) %>%
mutate(logsurface_error = log(surface_error)) %>%
ggplot(aes( x = factor(alpha_true), y = logsurface_error, group = interaction(factor(alpha_true), model), fill = model)) +
geom_boxplot()+
labs(x = "true alpha", y = "log surface error", title = "log surface errors") +
theme(legend.position = c(.7, .55),
legend.title = element_blank())
b = results_fhist %>%
pivot_longer(c(sigma_flode, sigma_fhist), names_to = "model", values_to = "sigma") %>%
mutate(model = str_remove(model, "sigma_")) %>%
ggplot(aes( x = factor(alpha_true), y = sigma, group = interaction(factor(alpha_true), model), fill = model)) +
geom_boxplot()+
geom_hline(yintercept = 0.10, linetype = 2) +
labs(x = "true alpha", y = "estimated sigma", title = "sigma") +
theme(legend.position = "none")
a + b
Show alpha estimation accuracy across 4 sample sizes for 3 different alpha values. Estimation improves as sample size increases as one would expect.
fig_sim_alpha-1.png in the manuscript .tex filea = results_fhist %>%
#filter(!(alpha_true %in% c(0.5, 4, 12))) %>%
ggplot(aes(y = alpha_est, fill = alpha)) +
geom_boxplot()+
scale_fill_viridis(discrete = TRUE) +
geom_hline(aes(yintercept = alpha_true), linetype = 2) +
facet_wrap(~ alpha_true, scales = "free", nrow = 1,
strip.position = "bottom" ) +
labs(x = "true alpha", y = "estimated alpha") +
theme_minimal() +
theme(legend.position = "none",
axis.ticks.x = element_blank(),
axis.text.x = element_blank())
b = results_flode %>%
ggplot(aes(x = factor(N), y = alpha_est, group = factor(N), fill = alpha)) +
geom_boxplot()+
scale_fill_viridis(discrete = TRUE) +
geom_hline(aes(yintercept = alpha_true), linetype = 2) +
facet_wrap(~ alpha, scales = "free", nrow = 1) +
theme_minimal() +
labs(x = "number of trials", y = "estimated alpha") +
theme(legend.position = "none")
a/b + plot_layout(heights = c(1, 2))